1   package org.apache.lucene.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.IOException;
21  import java.util.Arrays;
22  import java.util.Collection;
23  import java.util.Collections;
24  
25  import org.apache.lucene.store.DataInput;
26  import org.apache.lucene.store.DataOutput;
27  import org.apache.lucene.store.IndexInput;
28  
29  /** Represents a logical byte[] as a series of pages.  You
30   *  can write-once into the logical byte[] (append only),
31   *  using copy, and then retrieve slices (BytesRef) into it
32   *  using fill.
33   *
34   * @lucene.internal
35   **/
36  // TODO: refactor this, byteblockpool, fst.bytestore, and any
37  // other "shift/mask big arrays". there are too many of these classes!
38  public final class PagedBytes implements Accountable {
39    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(PagedBytes.class);
40    private byte[][] blocks = new byte[16][];
41    private int numBlocks;
42    // TODO: these are unused?
43    private final int blockSize;
44    private final int blockBits;
45    private final int blockMask;
46    private boolean didSkipBytes;
47    private boolean frozen;
48    private int upto;
49    private byte[] currentBlock;
50    private final long bytesUsedPerBlock;
51  
52    private static final byte[] EMPTY_BYTES = new byte[0];
53  
54    /** Provides methods to read BytesRefs from a frozen
55     *  PagedBytes.
56     *
57     * @see #freeze */
58    public final static class Reader implements Accountable {
59      private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Reader.class);
60      private final byte[][] blocks;
61      private final int blockBits;
62      private final int blockMask;
63      private final int blockSize;
64      private final long bytesUsedPerBlock;
65  
66      private Reader(PagedBytes pagedBytes) {
67        blocks = Arrays.copyOf(pagedBytes.blocks, pagedBytes.numBlocks);
68        blockBits = pagedBytes.blockBits;
69        blockMask = pagedBytes.blockMask;
70        blockSize = pagedBytes.blockSize;
71        bytesUsedPerBlock = pagedBytes.bytesUsedPerBlock;
72      }
73  
74      /**
75       * Gets a slice out of {@link PagedBytes} starting at <i>start</i> with a
76       * given length. Iff the slice spans across a block border this method will
77       * allocate sufficient resources and copy the paged data.
78       * <p>
79       * Slices spanning more than two blocks are not supported.
80       * </p>
81       * @lucene.internal 
82       **/
83      public void fillSlice(BytesRef b, long start, int length) {
84        assert length >= 0: "length=" + length;
85        assert length <= blockSize+1: "length=" + length;
86        b.length = length;
87        if (length == 0) {
88          return;
89        }
90        final int index = (int) (start >> blockBits);
91        final int offset = (int) (start & blockMask);
92        if (blockSize - offset >= length) {
93          // Within block
94          b.bytes = blocks[index];
95          b.offset = offset;
96        } else {
97          // Split
98          b.bytes = new byte[length];
99          b.offset = 0;
100         System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize-offset);
101         System.arraycopy(blocks[1+index], 0, b.bytes, blockSize-offset, length-(blockSize-offset));
102       }
103     }
104     
105     /**
106      * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>.
107      * <p>
108      * <b>Note:</b> this method does not support slices spanning across block
109      * borders.
110      * </p>
111      * 
112      * @lucene.internal
113      **/
114     // TODO: this really needs to be refactored into fieldcacheimpl
115     public void fill(BytesRef b, long start) {
116       final int index = (int) (start >> blockBits);
117       final int offset = (int) (start & blockMask);
118       final byte[] block = b.bytes = blocks[index];
119 
120       if ((block[offset] & 128) == 0) {
121         b.length = block[offset];
122         b.offset = offset+1;
123       } else {
124         b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
125         b.offset = offset+2;
126         assert b.length > 0;
127       }
128     }
129 
130     @Override
131     public long ramBytesUsed() {
132       long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);
133       if (blocks.length > 0) {
134         size += (blocks.length - 1) * bytesUsedPerBlock;
135         size += RamUsageEstimator.sizeOf(blocks[blocks.length - 1]);
136       }
137       return size;
138     }
139     
140     @Override
141     public Collection<Accountable> getChildResources() {
142       return Collections.emptyList();
143     }
144 
145     @Override
146     public String toString() {
147       return "PagedBytes(blocksize=" + blockSize + ")";
148     }
149   }
150 
151   /** 1&lt;&lt;blockBits must be bigger than biggest single
152    *  BytesRef slice that will be pulled */
153   public PagedBytes(int blockBits) {
154     assert blockBits > 0 && blockBits <= 31 : blockBits;
155     this.blockSize = 1 << blockBits;
156     this.blockBits = blockBits;
157     blockMask = blockSize-1;
158     upto = blockSize;
159     bytesUsedPerBlock = RamUsageEstimator.alignObjectSize(blockSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER);
160     numBlocks = 0;
161   }
162 
163   private void addBlock(byte[] block) {
164     if (blocks.length == numBlocks) {
165       blocks = Arrays.copyOf(blocks, ArrayUtil.oversize(numBlocks, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
166     }
167     blocks[numBlocks++] = block;
168   }
169 
170   /** Read this many bytes from in */
171   public void copy(IndexInput in, long byteCount) throws IOException {
172     while (byteCount > 0) {
173       int left = blockSize - upto;
174       if (left == 0) {
175         if (currentBlock != null) {
176           addBlock(currentBlock);
177         }
178         currentBlock = new byte[blockSize];
179         upto = 0;
180         left = blockSize;
181       }
182       if (left < byteCount) {
183         in.readBytes(currentBlock, upto, left, false);
184         upto = blockSize;
185         byteCount -= left;
186       } else {
187         in.readBytes(currentBlock, upto, (int) byteCount, false);
188         upto += byteCount;
189         break;
190       }
191     }
192   }
193 
194   /** Copy BytesRef in, setting BytesRef out to the result.
195    * Do not use this if you will use freeze(true).
196    * This only supports bytes.length &lt;= blockSize */
197   public void copy(BytesRef bytes, BytesRef out) {
198     int left = blockSize - upto;
199     if (bytes.length > left || currentBlock==null) {
200       if (currentBlock != null) {
201         addBlock(currentBlock);
202         didSkipBytes = true;
203       }
204       currentBlock = new byte[blockSize];
205       upto = 0;
206       left = blockSize;
207       assert bytes.length <= blockSize;
208       // TODO: we could also support variable block sizes
209     }
210 
211     out.bytes = currentBlock;
212     out.offset = upto;
213     out.length = bytes.length;
214 
215     System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
216     upto += bytes.length;
217   }
218 
219   /** Commits final byte[], trimming it if necessary and if trim=true */
220   public Reader freeze(boolean trim) {
221     if (frozen) {
222       throw new IllegalStateException("already frozen");
223     }
224     if (didSkipBytes) {
225       throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used");
226     }
227     if (trim && upto < blockSize) {
228       final byte[] newBlock = new byte[upto];
229       System.arraycopy(currentBlock, 0, newBlock, 0, upto);
230       currentBlock = newBlock;
231     }
232     if (currentBlock == null) {
233       currentBlock = EMPTY_BYTES;
234     }
235     addBlock(currentBlock);
236     frozen = true;
237     currentBlock = null;
238     return new PagedBytes.Reader(this);
239   }
240 
241   public long getPointer() {
242     if (currentBlock == null) {
243       return 0;
244     } else {
245       return (numBlocks * ((long) blockSize)) + upto;
246     }
247   }
248 
249   @Override
250   public long ramBytesUsed() {
251     long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);;
252     if (numBlocks > 0) {
253       size += (numBlocks - 1) * bytesUsedPerBlock;
254       size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]);
255     }
256     if (currentBlock != null) {
257       size += RamUsageEstimator.sizeOf(currentBlock);
258     }
259     return size;
260   }
261   
262   @Override
263   public Collection<Accountable> getChildResources() {
264     return Collections.emptyList();
265   }
266 
267   /** Copy bytes in, writing the length as a 1 or 2 byte
268    *  vInt prefix. */
269   // TODO: this really needs to be refactored into fieldcacheimpl!
270   public long copyUsingLengthPrefix(BytesRef bytes) {
271     if (bytes.length >= 32768) {
272       throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
273     }
274 
275     if (upto + bytes.length + 2 > blockSize) {
276       if (bytes.length + 2 > blockSize) {
277         throw new IllegalArgumentException("block size " + blockSize + " is too small to store length " + bytes.length + " bytes");
278       }
279       if (currentBlock != null) {
280         addBlock(currentBlock);     
281       }
282       currentBlock = new byte[blockSize];
283       upto = 0;
284     }
285 
286     final long pointer = getPointer();
287 
288     if (bytes.length < 128) {
289       currentBlock[upto++] = (byte) bytes.length;
290     } else {
291       currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8));
292       currentBlock[upto++] = (byte) (bytes.length & 0xff);
293     }
294     System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
295     upto += bytes.length;
296 
297     return pointer;
298   }
299 
300   public final class PagedBytesDataInput extends DataInput {
301     private int currentBlockIndex;
302     private int currentBlockUpto;
303     private byte[] currentBlock;
304 
305     PagedBytesDataInput() {
306       currentBlock = blocks[0];
307     }
308 
309     @Override
310     public PagedBytesDataInput clone() {
311       PagedBytesDataInput clone = getDataInput();
312       clone.setPosition(getPosition());
313       return clone;
314     }
315 
316     /** Returns the current byte position. */
317     public long getPosition() {
318       return (long) currentBlockIndex * blockSize + currentBlockUpto;
319     }
320   
321     /** Seek to a position previously obtained from
322      *  {@link #getPosition}. */
323     public void setPosition(long pos) {
324       currentBlockIndex = (int) (pos >> blockBits);
325       currentBlock = blocks[currentBlockIndex];
326       currentBlockUpto = (int) (pos & blockMask);
327     }
328 
329     @Override
330     public byte readByte() {
331       if (currentBlockUpto == blockSize) {
332         nextBlock();
333       }
334       return currentBlock[currentBlockUpto++];
335     }
336 
337     @Override
338     public void readBytes(byte[] b, int offset, int len) {
339       assert b.length >= offset + len;
340       final int offsetEnd = offset + len;
341       while (true) {
342         final int blockLeft = blockSize - currentBlockUpto;
343         final int left = offsetEnd - offset;
344         if (blockLeft < left) {
345           System.arraycopy(currentBlock, currentBlockUpto,
346                            b, offset,
347                            blockLeft);
348           nextBlock();
349           offset += blockLeft;
350         } else {
351           // Last block
352           System.arraycopy(currentBlock, currentBlockUpto,
353                            b, offset,
354                            left);
355           currentBlockUpto += left;
356           break;
357         }
358       }
359     }
360 
361     private void nextBlock() {
362       currentBlockIndex++;
363       currentBlockUpto = 0;
364       currentBlock = blocks[currentBlockIndex];
365     }
366   }
367 
368   public final class PagedBytesDataOutput extends DataOutput {
369     @Override
370     public void writeByte(byte b) {
371       if (upto == blockSize) {
372         if (currentBlock != null) {
373           addBlock(currentBlock);
374         }
375         currentBlock = new byte[blockSize];
376         upto = 0;
377       }
378       currentBlock[upto++] = b;
379     }
380 
381     @Override
382     public void writeBytes(byte[] b, int offset, int length) {
383       assert b.length >= offset + length;
384       if (length == 0) {
385         return;
386       }
387 
388       if (upto == blockSize) {
389         if (currentBlock != null) {
390           addBlock(currentBlock);
391         }
392         currentBlock = new byte[blockSize];
393         upto = 0;
394       }
395           
396       final int offsetEnd = offset + length;
397       while(true) {
398         final int left = offsetEnd - offset;
399         final int blockLeft = blockSize - upto;
400         if (blockLeft < left) {
401           System.arraycopy(b, offset, currentBlock, upto, blockLeft);
402           addBlock(currentBlock);
403           currentBlock = new byte[blockSize];
404           upto = 0;
405           offset += blockLeft;
406         } else {
407           // Last block
408           System.arraycopy(b, offset, currentBlock, upto, left);
409           upto += left;
410           break;
411         }
412       }
413     }
414 
415     /** Return the current byte position. */
416     public long getPosition() {
417       return getPointer();
418     }
419   }
420 
421   /** Returns a DataInput to read values from this
422    *  PagedBytes instance. */
423   public PagedBytesDataInput getDataInput() {
424     if (!frozen) {
425       throw new IllegalStateException("must call freeze() before getDataInput");
426     }
427     return new PagedBytesDataInput();
428   }
429 
430   /** Returns a DataOutput that you may use to write into
431    *  this PagedBytes instance.  If you do this, you should
432    *  not call the other writing methods (eg, copy);
433    *  results are undefined. */
434   public PagedBytesDataOutput getDataOutput() {
435     if (frozen) {
436       throw new IllegalStateException("cannot get DataOutput after freeze()");
437     }
438     return new PagedBytesDataOutput();
439   }
440 }